{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "#蒸汽量预测\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn import tree\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor\n",
    "from sklearn.model_selection import cross_val_score,train_test_split,GridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>V0</th>\n",
       "      <th>V1</th>\n",
       "      <th>V2</th>\n",
       "      <th>V3</th>\n",
       "      <th>V4</th>\n",
       "      <th>V5</th>\n",
       "      <th>V6</th>\n",
       "      <th>V7</th>\n",
       "      <th>V8</th>\n",
       "      <th>V9</th>\n",
       "      <th>...</th>\n",
       "      <th>V29</th>\n",
       "      <th>V30</th>\n",
       "      <th>V31</th>\n",
       "      <th>V32</th>\n",
       "      <th>V33</th>\n",
       "      <th>V34</th>\n",
       "      <th>V35</th>\n",
       "      <th>V36</th>\n",
       "      <th>V37</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.566</td>\n",
       "      <td>0.016</td>\n",
       "      <td>-0.143</td>\n",
       "      <td>0.407</td>\n",
       "      <td>0.452</td>\n",
       "      <td>-0.901</td>\n",
       "      <td>-1.812</td>\n",
       "      <td>-2.360</td>\n",
       "      <td>-0.436</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>0.136</td>\n",
       "      <td>0.109</td>\n",
       "      <td>-0.615</td>\n",
       "      <td>0.327</td>\n",
       "      <td>-4.627</td>\n",
       "      <td>-4.789</td>\n",
       "      <td>-5.101</td>\n",
       "      <td>-2.608</td>\n",
       "      <td>-3.508</td>\n",
       "      <td>0.175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.968</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.066</td>\n",
       "      <td>0.566</td>\n",
       "      <td>0.194</td>\n",
       "      <td>-0.893</td>\n",
       "      <td>-1.566</td>\n",
       "      <td>-2.360</td>\n",
       "      <td>0.332</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.128</td>\n",
       "      <td>0.124</td>\n",
       "      <td>0.032</td>\n",
       "      <td>0.600</td>\n",
       "      <td>-0.843</td>\n",
       "      <td>0.160</td>\n",
       "      <td>0.364</td>\n",
       "      <td>-0.335</td>\n",
       "      <td>-0.730</td>\n",
       "      <td>0.676</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.013</td>\n",
       "      <td>0.568</td>\n",
       "      <td>0.235</td>\n",
       "      <td>0.370</td>\n",
       "      <td>0.112</td>\n",
       "      <td>-0.797</td>\n",
       "      <td>-1.367</td>\n",
       "      <td>-2.360</td>\n",
       "      <td>0.396</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.009</td>\n",
       "      <td>0.361</td>\n",
       "      <td>0.277</td>\n",
       "      <td>-0.116</td>\n",
       "      <td>-0.843</td>\n",
       "      <td>0.160</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.765</td>\n",
       "      <td>-0.589</td>\n",
       "      <td>0.633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.733</td>\n",
       "      <td>0.368</td>\n",
       "      <td>0.283</td>\n",
       "      <td>0.165</td>\n",
       "      <td>0.599</td>\n",
       "      <td>-0.679</td>\n",
       "      <td>-1.200</td>\n",
       "      <td>-2.086</td>\n",
       "      <td>0.403</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>0.015</td>\n",
       "      <td>0.417</td>\n",
       "      <td>0.279</td>\n",
       "      <td>0.603</td>\n",
       "      <td>-0.843</td>\n",
       "      <td>-0.065</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.333</td>\n",
       "      <td>-0.112</td>\n",
       "      <td>0.206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.684</td>\n",
       "      <td>0.638</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.209</td>\n",
       "      <td>0.337</td>\n",
       "      <td>-0.454</td>\n",
       "      <td>-1.073</td>\n",
       "      <td>-2.086</td>\n",
       "      <td>0.314</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>0.183</td>\n",
       "      <td>1.078</td>\n",
       "      <td>0.328</td>\n",
       "      <td>0.418</td>\n",
       "      <td>-0.843</td>\n",
       "      <td>-0.215</td>\n",
       "      <td>0.364</td>\n",
       "      <td>-0.280</td>\n",
       "      <td>-0.028</td>\n",
       "      <td>0.384</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2883</th>\n",
       "      <td>0.190</td>\n",
       "      <td>-0.025</td>\n",
       "      <td>-0.138</td>\n",
       "      <td>0.161</td>\n",
       "      <td>0.600</td>\n",
       "      <td>-0.212</td>\n",
       "      <td>0.757</td>\n",
       "      <td>0.584</td>\n",
       "      <td>-0.026</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.128</td>\n",
       "      <td>-0.208</td>\n",
       "      <td>0.809</td>\n",
       "      <td>-0.173</td>\n",
       "      <td>0.247</td>\n",
       "      <td>-0.027</td>\n",
       "      <td>-0.349</td>\n",
       "      <td>0.576</td>\n",
       "      <td>0.686</td>\n",
       "      <td>0.235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2884</th>\n",
       "      <td>0.507</td>\n",
       "      <td>0.557</td>\n",
       "      <td>0.296</td>\n",
       "      <td>0.183</td>\n",
       "      <td>0.530</td>\n",
       "      <td>-0.237</td>\n",
       "      <td>0.749</td>\n",
       "      <td>0.584</td>\n",
       "      <td>0.537</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.291</td>\n",
       "      <td>-0.287</td>\n",
       "      <td>0.465</td>\n",
       "      <td>-0.310</td>\n",
       "      <td>0.763</td>\n",
       "      <td>0.498</td>\n",
       "      <td>-0.349</td>\n",
       "      <td>-0.615</td>\n",
       "      <td>-0.380</td>\n",
       "      <td>1.042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2885</th>\n",
       "      <td>-0.394</td>\n",
       "      <td>-0.721</td>\n",
       "      <td>-0.485</td>\n",
       "      <td>0.084</td>\n",
       "      <td>0.136</td>\n",
       "      <td>0.034</td>\n",
       "      <td>0.655</td>\n",
       "      <td>0.614</td>\n",
       "      <td>-0.818</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.291</td>\n",
       "      <td>-0.179</td>\n",
       "      <td>0.268</td>\n",
       "      <td>0.552</td>\n",
       "      <td>0.763</td>\n",
       "      <td>0.498</td>\n",
       "      <td>-0.349</td>\n",
       "      <td>0.951</td>\n",
       "      <td>0.748</td>\n",
       "      <td>0.005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2886</th>\n",
       "      <td>-0.219</td>\n",
       "      <td>-0.282</td>\n",
       "      <td>-0.344</td>\n",
       "      <td>-0.049</td>\n",
       "      <td>0.449</td>\n",
       "      <td>-0.140</td>\n",
       "      <td>0.560</td>\n",
       "      <td>0.583</td>\n",
       "      <td>-0.596</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.216</td>\n",
       "      <td>1.061</td>\n",
       "      <td>-0.051</td>\n",
       "      <td>1.023</td>\n",
       "      <td>0.878</td>\n",
       "      <td>0.610</td>\n",
       "      <td>-0.230</td>\n",
       "      <td>-0.301</td>\n",
       "      <td>0.555</td>\n",
       "      <td>0.350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2887</th>\n",
       "      <td>0.368</td>\n",
       "      <td>0.380</td>\n",
       "      <td>-0.225</td>\n",
       "      <td>-0.049</td>\n",
       "      <td>0.379</td>\n",
       "      <td>0.092</td>\n",
       "      <td>0.550</td>\n",
       "      <td>0.551</td>\n",
       "      <td>0.244</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.047</td>\n",
       "      <td>0.057</td>\n",
       "      <td>-0.042</td>\n",
       "      <td>0.847</td>\n",
       "      <td>0.534</td>\n",
       "      <td>-0.009</td>\n",
       "      <td>-0.190</td>\n",
       "      <td>-0.567</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.417</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2888 rows × 39 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         V0     V1     V2     V3     V4     V5     V6     V7     V8     V9  \\\n",
       "0     0.566  0.016 -0.143  0.407  0.452 -0.901 -1.812 -2.360 -0.436 -2.114   \n",
       "1     0.968  0.437  0.066  0.566  0.194 -0.893 -1.566 -2.360  0.332 -2.114   \n",
       "2     1.013  0.568  0.235  0.370  0.112 -0.797 -1.367 -2.360  0.396 -2.114   \n",
       "3     0.733  0.368  0.283  0.165  0.599 -0.679 -1.200 -2.086  0.403 -2.114   \n",
       "4     0.684  0.638  0.260  0.209  0.337 -0.454 -1.073 -2.086  0.314 -2.114   \n",
       "...     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "2883  0.190 -0.025 -0.138  0.161  0.600 -0.212  0.757  0.584 -0.026  0.904   \n",
       "2884  0.507  0.557  0.296  0.183  0.530 -0.237  0.749  0.584  0.537  0.904   \n",
       "2885 -0.394 -0.721 -0.485  0.084  0.136  0.034  0.655  0.614 -0.818  0.904   \n",
       "2886 -0.219 -0.282 -0.344 -0.049  0.449 -0.140  0.560  0.583 -0.596  0.904   \n",
       "2887  0.368  0.380 -0.225 -0.049  0.379  0.092  0.550  0.551  0.244  0.904   \n",
       "\n",
       "      ...    V29    V30    V31    V32    V33    V34    V35    V36    V37  \\\n",
       "0     ...  0.136  0.109 -0.615  0.327 -4.627 -4.789 -5.101 -2.608 -3.508   \n",
       "1     ... -0.128  0.124  0.032  0.600 -0.843  0.160  0.364 -0.335 -0.730   \n",
       "2     ... -0.009  0.361  0.277 -0.116 -0.843  0.160  0.364  0.765 -0.589   \n",
       "3     ...  0.015  0.417  0.279  0.603 -0.843 -0.065  0.364  0.333 -0.112   \n",
       "4     ...  0.183  1.078  0.328  0.418 -0.843 -0.215  0.364 -0.280 -0.028   \n",
       "...   ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "2883  ...  0.128 -0.208  0.809 -0.173  0.247 -0.027 -0.349  0.576  0.686   \n",
       "2884  ...  0.291 -0.287  0.465 -0.310  0.763  0.498 -0.349 -0.615 -0.380   \n",
       "2885  ...  0.291 -0.179  0.268  0.552  0.763  0.498 -0.349  0.951  0.748   \n",
       "2886  ...  0.216  1.061 -0.051  1.023  0.878  0.610 -0.230 -0.301  0.555   \n",
       "2887  ...  0.047  0.057 -0.042  0.847  0.534 -0.009 -0.190 -0.567  0.388   \n",
       "\n",
       "      target  \n",
       "0      0.175  \n",
       "1      0.676  \n",
       "2      0.633  \n",
       "3      0.206  \n",
       "4      0.384  \n",
       "...      ...  \n",
       "2883   0.235  \n",
       "2884   1.042  \n",
       "2885   0.005  \n",
       "2886   0.350  \n",
       "2887   0.417  \n",
       "\n",
       "[2888 rows x 39 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data = pd.read_csv('zhengqi_train.txt',sep='\\t')\n",
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "18"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train0 = train_data[train_data['V9']>-7.5]\n",
    "new_numerical=['V0', 'V1', 'V8', 'V27', 'V31', 'V2', 'V3','V37','V20', 'V10',\n",
    "               'V5', 'V24', 'V13', 'V19', 'V18','V30','V22','target']\n",
    "len(new_numerical)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>V0</th>\n",
       "      <th>V1</th>\n",
       "      <th>V2</th>\n",
       "      <th>V3</th>\n",
       "      <th>V4</th>\n",
       "      <th>V5</th>\n",
       "      <th>V6</th>\n",
       "      <th>V7</th>\n",
       "      <th>V8</th>\n",
       "      <th>V9</th>\n",
       "      <th>...</th>\n",
       "      <th>V29</th>\n",
       "      <th>V30</th>\n",
       "      <th>V31</th>\n",
       "      <th>V32</th>\n",
       "      <th>V33</th>\n",
       "      <th>V34</th>\n",
       "      <th>V35</th>\n",
       "      <th>V36</th>\n",
       "      <th>V37</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.566</td>\n",
       "      <td>0.016</td>\n",
       "      <td>-0.143</td>\n",
       "      <td>0.407</td>\n",
       "      <td>0.452</td>\n",
       "      <td>-0.901</td>\n",
       "      <td>-1.812</td>\n",
       "      <td>-2.360</td>\n",
       "      <td>-0.436</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>0.136</td>\n",
       "      <td>0.109</td>\n",
       "      <td>-0.615</td>\n",
       "      <td>0.327</td>\n",
       "      <td>-4.627</td>\n",
       "      <td>-4.789</td>\n",
       "      <td>-5.101</td>\n",
       "      <td>-2.608</td>\n",
       "      <td>-3.508</td>\n",
       "      <td>0.175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.968</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.066</td>\n",
       "      <td>0.566</td>\n",
       "      <td>0.194</td>\n",
       "      <td>-0.893</td>\n",
       "      <td>-1.566</td>\n",
       "      <td>-2.360</td>\n",
       "      <td>0.332</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.128</td>\n",
       "      <td>0.124</td>\n",
       "      <td>0.032</td>\n",
       "      <td>0.600</td>\n",
       "      <td>-0.843</td>\n",
       "      <td>0.160</td>\n",
       "      <td>0.364</td>\n",
       "      <td>-0.335</td>\n",
       "      <td>-0.730</td>\n",
       "      <td>0.676</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.013</td>\n",
       "      <td>0.568</td>\n",
       "      <td>0.235</td>\n",
       "      <td>0.370</td>\n",
       "      <td>0.112</td>\n",
       "      <td>-0.797</td>\n",
       "      <td>-1.367</td>\n",
       "      <td>-2.360</td>\n",
       "      <td>0.396</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.009</td>\n",
       "      <td>0.361</td>\n",
       "      <td>0.277</td>\n",
       "      <td>-0.116</td>\n",
       "      <td>-0.843</td>\n",
       "      <td>0.160</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.765</td>\n",
       "      <td>-0.589</td>\n",
       "      <td>0.633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.733</td>\n",
       "      <td>0.368</td>\n",
       "      <td>0.283</td>\n",
       "      <td>0.165</td>\n",
       "      <td>0.599</td>\n",
       "      <td>-0.679</td>\n",
       "      <td>-1.200</td>\n",
       "      <td>-2.086</td>\n",
       "      <td>0.403</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>0.015</td>\n",
       "      <td>0.417</td>\n",
       "      <td>0.279</td>\n",
       "      <td>0.603</td>\n",
       "      <td>-0.843</td>\n",
       "      <td>-0.065</td>\n",
       "      <td>0.364</td>\n",
       "      <td>0.333</td>\n",
       "      <td>-0.112</td>\n",
       "      <td>0.206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.684</td>\n",
       "      <td>0.638</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.209</td>\n",
       "      <td>0.337</td>\n",
       "      <td>-0.454</td>\n",
       "      <td>-1.073</td>\n",
       "      <td>-2.086</td>\n",
       "      <td>0.314</td>\n",
       "      <td>-2.114</td>\n",
       "      <td>...</td>\n",
       "      <td>0.183</td>\n",
       "      <td>1.078</td>\n",
       "      <td>0.328</td>\n",
       "      <td>0.418</td>\n",
       "      <td>-0.843</td>\n",
       "      <td>-0.215</td>\n",
       "      <td>0.364</td>\n",
       "      <td>-0.280</td>\n",
       "      <td>-0.028</td>\n",
       "      <td>0.384</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2883</th>\n",
       "      <td>0.190</td>\n",
       "      <td>-0.025</td>\n",
       "      <td>-0.138</td>\n",
       "      <td>0.161</td>\n",
       "      <td>0.600</td>\n",
       "      <td>-0.212</td>\n",
       "      <td>0.757</td>\n",
       "      <td>0.584</td>\n",
       "      <td>-0.026</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.128</td>\n",
       "      <td>-0.208</td>\n",
       "      <td>0.809</td>\n",
       "      <td>-0.173</td>\n",
       "      <td>0.247</td>\n",
       "      <td>-0.027</td>\n",
       "      <td>-0.349</td>\n",
       "      <td>0.576</td>\n",
       "      <td>0.686</td>\n",
       "      <td>0.235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2884</th>\n",
       "      <td>0.507</td>\n",
       "      <td>0.557</td>\n",
       "      <td>0.296</td>\n",
       "      <td>0.183</td>\n",
       "      <td>0.530</td>\n",
       "      <td>-0.237</td>\n",
       "      <td>0.749</td>\n",
       "      <td>0.584</td>\n",
       "      <td>0.537</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.291</td>\n",
       "      <td>-0.287</td>\n",
       "      <td>0.465</td>\n",
       "      <td>-0.310</td>\n",
       "      <td>0.763</td>\n",
       "      <td>0.498</td>\n",
       "      <td>-0.349</td>\n",
       "      <td>-0.615</td>\n",
       "      <td>-0.380</td>\n",
       "      <td>1.042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2885</th>\n",
       "      <td>-0.394</td>\n",
       "      <td>-0.721</td>\n",
       "      <td>-0.485</td>\n",
       "      <td>0.084</td>\n",
       "      <td>0.136</td>\n",
       "      <td>0.034</td>\n",
       "      <td>0.655</td>\n",
       "      <td>0.614</td>\n",
       "      <td>-0.818</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.291</td>\n",
       "      <td>-0.179</td>\n",
       "      <td>0.268</td>\n",
       "      <td>0.552</td>\n",
       "      <td>0.763</td>\n",
       "      <td>0.498</td>\n",
       "      <td>-0.349</td>\n",
       "      <td>0.951</td>\n",
       "      <td>0.748</td>\n",
       "      <td>0.005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2886</th>\n",
       "      <td>-0.219</td>\n",
       "      <td>-0.282</td>\n",
       "      <td>-0.344</td>\n",
       "      <td>-0.049</td>\n",
       "      <td>0.449</td>\n",
       "      <td>-0.140</td>\n",
       "      <td>0.560</td>\n",
       "      <td>0.583</td>\n",
       "      <td>-0.596</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.216</td>\n",
       "      <td>1.061</td>\n",
       "      <td>-0.051</td>\n",
       "      <td>1.023</td>\n",
       "      <td>0.878</td>\n",
       "      <td>0.610</td>\n",
       "      <td>-0.230</td>\n",
       "      <td>-0.301</td>\n",
       "      <td>0.555</td>\n",
       "      <td>0.350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2887</th>\n",
       "      <td>0.368</td>\n",
       "      <td>0.380</td>\n",
       "      <td>-0.225</td>\n",
       "      <td>-0.049</td>\n",
       "      <td>0.379</td>\n",
       "      <td>0.092</td>\n",
       "      <td>0.550</td>\n",
       "      <td>0.551</td>\n",
       "      <td>0.244</td>\n",
       "      <td>0.904</td>\n",
       "      <td>...</td>\n",
       "      <td>0.047</td>\n",
       "      <td>0.057</td>\n",
       "      <td>-0.042</td>\n",
       "      <td>0.847</td>\n",
       "      <td>0.534</td>\n",
       "      <td>-0.009</td>\n",
       "      <td>-0.190</td>\n",
       "      <td>-0.567</td>\n",
       "      <td>0.388</td>\n",
       "      <td>0.417</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2886 rows × 39 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         V0     V1     V2     V3     V4     V5     V6     V7     V8     V9  \\\n",
       "0     0.566  0.016 -0.143  0.407  0.452 -0.901 -1.812 -2.360 -0.436 -2.114   \n",
       "1     0.968  0.437  0.066  0.566  0.194 -0.893 -1.566 -2.360  0.332 -2.114   \n",
       "2     1.013  0.568  0.235  0.370  0.112 -0.797 -1.367 -2.360  0.396 -2.114   \n",
       "3     0.733  0.368  0.283  0.165  0.599 -0.679 -1.200 -2.086  0.403 -2.114   \n",
       "4     0.684  0.638  0.260  0.209  0.337 -0.454 -1.073 -2.086  0.314 -2.114   \n",
       "...     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "2883  0.190 -0.025 -0.138  0.161  0.600 -0.212  0.757  0.584 -0.026  0.904   \n",
       "2884  0.507  0.557  0.296  0.183  0.530 -0.237  0.749  0.584  0.537  0.904   \n",
       "2885 -0.394 -0.721 -0.485  0.084  0.136  0.034  0.655  0.614 -0.818  0.904   \n",
       "2886 -0.219 -0.282 -0.344 -0.049  0.449 -0.140  0.560  0.583 -0.596  0.904   \n",
       "2887  0.368  0.380 -0.225 -0.049  0.379  0.092  0.550  0.551  0.244  0.904   \n",
       "\n",
       "      ...    V29    V30    V31    V32    V33    V34    V35    V36    V37  \\\n",
       "0     ...  0.136  0.109 -0.615  0.327 -4.627 -4.789 -5.101 -2.608 -3.508   \n",
       "1     ... -0.128  0.124  0.032  0.600 -0.843  0.160  0.364 -0.335 -0.730   \n",
       "2     ... -0.009  0.361  0.277 -0.116 -0.843  0.160  0.364  0.765 -0.589   \n",
       "3     ...  0.015  0.417  0.279  0.603 -0.843 -0.065  0.364  0.333 -0.112   \n",
       "4     ...  0.183  1.078  0.328  0.418 -0.843 -0.215  0.364 -0.280 -0.028   \n",
       "...   ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "2883  ...  0.128 -0.208  0.809 -0.173  0.247 -0.027 -0.349  0.576  0.686   \n",
       "2884  ...  0.291 -0.287  0.465 -0.310  0.763  0.498 -0.349 -0.615 -0.380   \n",
       "2885  ...  0.291 -0.179  0.268  0.552  0.763  0.498 -0.349  0.951  0.748   \n",
       "2886  ...  0.216  1.061 -0.051  1.023  0.878  0.610 -0.230 -0.301  0.555   \n",
       "2887  ...  0.047  0.057 -0.042  0.847  0.534 -0.009 -0.190 -0.567  0.388   \n",
       "\n",
       "      target  \n",
       "0      0.175  \n",
       "1      0.676  \n",
       "2      0.633  \n",
       "3      0.206  \n",
       "4      0.384  \n",
       "...      ...  \n",
       "2883   0.235  \n",
       "2884   1.042  \n",
       "2885   0.005  \n",
       "2886   0.350  \n",
       "2887   0.417  \n",
       "\n",
       "[2886 rows x 39 columns]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>V0</th>\n",
       "      <th>V1</th>\n",
       "      <th>V8</th>\n",
       "      <th>V27</th>\n",
       "      <th>V31</th>\n",
       "      <th>V2</th>\n",
       "      <th>V3</th>\n",
       "      <th>V37</th>\n",
       "      <th>V20</th>\n",
       "      <th>V10</th>\n",
       "      <th>V5</th>\n",
       "      <th>V24</th>\n",
       "      <th>V13</th>\n",
       "      <th>V19</th>\n",
       "      <th>V18</th>\n",
       "      <th>V30</th>\n",
       "      <th>V22</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.566</td>\n",
       "      <td>0.016</td>\n",
       "      <td>-0.436</td>\n",
       "      <td>0.168</td>\n",
       "      <td>-0.615</td>\n",
       "      <td>-0.143</td>\n",
       "      <td>0.407</td>\n",
       "      <td>-3.508</td>\n",
       "      <td>0.610</td>\n",
       "      <td>-0.940</td>\n",
       "      <td>-0.901</td>\n",
       "      <td>0.800</td>\n",
       "      <td>0.550</td>\n",
       "      <td>-0.991</td>\n",
       "      <td>-0.573</td>\n",
       "      <td>0.109</td>\n",
       "      <td>-0.063</td>\n",
       "      <td>0.175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.968</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.338</td>\n",
       "      <td>0.032</td>\n",
       "      <td>0.066</td>\n",
       "      <td>0.566</td>\n",
       "      <td>-0.730</td>\n",
       "      <td>0.588</td>\n",
       "      <td>0.188</td>\n",
       "      <td>-0.893</td>\n",
       "      <td>0.801</td>\n",
       "      <td>1.109</td>\n",
       "      <td>-0.836</td>\n",
       "      <td>-0.571</td>\n",
       "      <td>0.124</td>\n",
       "      <td>-0.063</td>\n",
       "      <td>0.676</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.013</td>\n",
       "      <td>0.568</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.326</td>\n",
       "      <td>0.277</td>\n",
       "      <td>0.235</td>\n",
       "      <td>0.370</td>\n",
       "      <td>-0.589</td>\n",
       "      <td>0.576</td>\n",
       "      <td>0.874</td>\n",
       "      <td>-0.797</td>\n",
       "      <td>0.961</td>\n",
       "      <td>0.767</td>\n",
       "      <td>-0.558</td>\n",
       "      <td>-0.564</td>\n",
       "      <td>0.361</td>\n",
       "      <td>-0.063</td>\n",
       "      <td>0.633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.733</td>\n",
       "      <td>0.368</td>\n",
       "      <td>0.403</td>\n",
       "      <td>0.277</td>\n",
       "      <td>0.279</td>\n",
       "      <td>0.283</td>\n",
       "      <td>0.165</td>\n",
       "      <td>-0.112</td>\n",
       "      <td>0.272</td>\n",
       "      <td>0.011</td>\n",
       "      <td>-0.679</td>\n",
       "      <td>1.435</td>\n",
       "      <td>0.769</td>\n",
       "      <td>-0.564</td>\n",
       "      <td>-0.574</td>\n",
       "      <td>0.417</td>\n",
       "      <td>-0.063</td>\n",
       "      <td>0.206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.684</td>\n",
       "      <td>0.638</td>\n",
       "      <td>0.314</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.328</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.209</td>\n",
       "      <td>-0.028</td>\n",
       "      <td>0.106</td>\n",
       "      <td>-0.251</td>\n",
       "      <td>-0.454</td>\n",
       "      <td>0.881</td>\n",
       "      <td>-0.349</td>\n",
       "      <td>-0.394</td>\n",
       "      <td>-0.572</td>\n",
       "      <td>1.078</td>\n",
       "      <td>-0.259</td>\n",
       "      <td>0.384</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2883</th>\n",
       "      <td>0.190</td>\n",
       "      <td>-0.025</td>\n",
       "      <td>-0.026</td>\n",
       "      <td>0.189</td>\n",
       "      <td>0.809</td>\n",
       "      <td>-0.138</td>\n",
       "      <td>0.161</td>\n",
       "      <td>0.686</td>\n",
       "      <td>0.598</td>\n",
       "      <td>0.355</td>\n",
       "      <td>-0.212</td>\n",
       "      <td>-1.310</td>\n",
       "      <td>0.141</td>\n",
       "      <td>1.003</td>\n",
       "      <td>0.454</td>\n",
       "      <td>-0.208</td>\n",
       "      <td>0.314</td>\n",
       "      <td>0.235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2884</th>\n",
       "      <td>0.507</td>\n",
       "      <td>0.557</td>\n",
       "      <td>0.537</td>\n",
       "      <td>0.372</td>\n",
       "      <td>0.465</td>\n",
       "      <td>0.296</td>\n",
       "      <td>0.183</td>\n",
       "      <td>-0.380</td>\n",
       "      <td>0.562</td>\n",
       "      <td>-0.061</td>\n",
       "      <td>-0.237</td>\n",
       "      <td>-1.314</td>\n",
       "      <td>-0.634</td>\n",
       "      <td>1.076</td>\n",
       "      <td>0.808</td>\n",
       "      <td>-0.287</td>\n",
       "      <td>0.314</td>\n",
       "      <td>1.042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2885</th>\n",
       "      <td>-0.394</td>\n",
       "      <td>-0.721</td>\n",
       "      <td>-0.818</td>\n",
       "      <td>0.058</td>\n",
       "      <td>0.268</td>\n",
       "      <td>-0.485</td>\n",
       "      <td>0.084</td>\n",
       "      <td>0.748</td>\n",
       "      <td>-0.188</td>\n",
       "      <td>0.240</td>\n",
       "      <td>0.034</td>\n",
       "      <td>-1.310</td>\n",
       "      <td>0.389</td>\n",
       "      <td>1.042</td>\n",
       "      <td>0.813</td>\n",
       "      <td>-0.179</td>\n",
       "      <td>0.133</td>\n",
       "      <td>0.005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2886</th>\n",
       "      <td>-0.219</td>\n",
       "      <td>-0.282</td>\n",
       "      <td>-0.596</td>\n",
       "      <td>0.133</td>\n",
       "      <td>-0.051</td>\n",
       "      <td>-0.344</td>\n",
       "      <td>-0.049</td>\n",
       "      <td>0.555</td>\n",
       "      <td>-0.497</td>\n",
       "      <td>-0.395</td>\n",
       "      <td>-0.140</td>\n",
       "      <td>-1.313</td>\n",
       "      <td>-0.310</td>\n",
       "      <td>1.129</td>\n",
       "      <td>0.803</td>\n",
       "      <td>1.061</td>\n",
       "      <td>0.019</td>\n",
       "      <td>0.350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2887</th>\n",
       "      <td>0.368</td>\n",
       "      <td>0.380</td>\n",
       "      <td>0.244</td>\n",
       "      <td>0.208</td>\n",
       "      <td>-0.042</td>\n",
       "      <td>-0.225</td>\n",
       "      <td>-0.049</td>\n",
       "      <td>0.388</td>\n",
       "      <td>-0.252</td>\n",
       "      <td>-0.419</td>\n",
       "      <td>0.092</td>\n",
       "      <td>-1.314</td>\n",
       "      <td>-0.114</td>\n",
       "      <td>0.899</td>\n",
       "      <td>0.247</td>\n",
       "      <td>0.057</td>\n",
       "      <td>-0.063</td>\n",
       "      <td>0.417</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2886 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         V0     V1     V8    V27    V31     V2     V3    V37    V20    V10  \\\n",
       "0     0.566  0.016 -0.436  0.168 -0.615 -0.143  0.407 -3.508  0.610 -0.940   \n",
       "1     0.968  0.437  0.332  0.338  0.032  0.066  0.566 -0.730  0.588  0.188   \n",
       "2     1.013  0.568  0.396  0.326  0.277  0.235  0.370 -0.589  0.576  0.874   \n",
       "3     0.733  0.368  0.403  0.277  0.279  0.283  0.165 -0.112  0.272  0.011   \n",
       "4     0.684  0.638  0.314  0.332  0.328  0.260  0.209 -0.028  0.106 -0.251   \n",
       "...     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "2883  0.190 -0.025 -0.026  0.189  0.809 -0.138  0.161  0.686  0.598  0.355   \n",
       "2884  0.507  0.557  0.537  0.372  0.465  0.296  0.183 -0.380  0.562 -0.061   \n",
       "2885 -0.394 -0.721 -0.818  0.058  0.268 -0.485  0.084  0.748 -0.188  0.240   \n",
       "2886 -0.219 -0.282 -0.596  0.133 -0.051 -0.344 -0.049  0.555 -0.497 -0.395   \n",
       "2887  0.368  0.380  0.244  0.208 -0.042 -0.225 -0.049  0.388 -0.252 -0.419   \n",
       "\n",
       "         V5    V24    V13    V19    V18    V30    V22  target  \n",
       "0    -0.901  0.800  0.550 -0.991 -0.573  0.109 -0.063   0.175  \n",
       "1    -0.893  0.801  1.109 -0.836 -0.571  0.124 -0.063   0.676  \n",
       "2    -0.797  0.961  0.767 -0.558 -0.564  0.361 -0.063   0.633  \n",
       "3    -0.679  1.435  0.769 -0.564 -0.574  0.417 -0.063   0.206  \n",
       "4    -0.454  0.881 -0.349 -0.394 -0.572  1.078 -0.259   0.384  \n",
       "...     ...    ...    ...    ...    ...    ...    ...     ...  \n",
       "2883 -0.212 -1.310  0.141  1.003  0.454 -0.208  0.314   0.235  \n",
       "2884 -0.237 -1.314 -0.634  1.076  0.808 -0.287  0.314   1.042  \n",
       "2885  0.034 -1.310  0.389  1.042  0.813 -0.179  0.133   0.005  \n",
       "2886 -0.140 -1.313 -0.310  1.129  0.803  1.061  0.019   0.350  \n",
       "2887  0.092 -1.314 -0.114  0.899  0.247  0.057 -0.063   0.417  \n",
       "\n",
       "[2886 rows x 18 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train1 = train0[new_numerical]\n",
    "train1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>V0</th>\n",
       "      <th>V1</th>\n",
       "      <th>V8</th>\n",
       "      <th>V27</th>\n",
       "      <th>V31</th>\n",
       "      <th>V2</th>\n",
       "      <th>V3</th>\n",
       "      <th>V37</th>\n",
       "      <th>V20</th>\n",
       "      <th>V10</th>\n",
       "      <th>V5</th>\n",
       "      <th>V24</th>\n",
       "      <th>V13</th>\n",
       "      <th>V19</th>\n",
       "      <th>V18</th>\n",
       "      <th>V30</th>\n",
       "      <th>V22</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.566</td>\n",
       "      <td>0.016</td>\n",
       "      <td>-0.436</td>\n",
       "      <td>0.168</td>\n",
       "      <td>-0.615</td>\n",
       "      <td>-0.143</td>\n",
       "      <td>0.407</td>\n",
       "      <td>-3.508</td>\n",
       "      <td>0.610</td>\n",
       "      <td>-0.940</td>\n",
       "      <td>-0.901</td>\n",
       "      <td>0.800</td>\n",
       "      <td>0.550</td>\n",
       "      <td>-0.991</td>\n",
       "      <td>-0.573</td>\n",
       "      <td>0.109</td>\n",
       "      <td>-0.063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.968</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.338</td>\n",
       "      <td>0.032</td>\n",
       "      <td>0.066</td>\n",
       "      <td>0.566</td>\n",
       "      <td>-0.730</td>\n",
       "      <td>0.588</td>\n",
       "      <td>0.188</td>\n",
       "      <td>-0.893</td>\n",
       "      <td>0.801</td>\n",
       "      <td>1.109</td>\n",
       "      <td>-0.836</td>\n",
       "      <td>-0.571</td>\n",
       "      <td>0.124</td>\n",
       "      <td>-0.063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.013</td>\n",
       "      <td>0.568</td>\n",
       "      <td>0.396</td>\n",
       "      <td>0.326</td>\n",
       "      <td>0.277</td>\n",
       "      <td>0.235</td>\n",
       "      <td>0.370</td>\n",
       "      <td>-0.589</td>\n",
       "      <td>0.576</td>\n",
       "      <td>0.874</td>\n",
       "      <td>-0.797</td>\n",
       "      <td>0.961</td>\n",
       "      <td>0.767</td>\n",
       "      <td>-0.558</td>\n",
       "      <td>-0.564</td>\n",
       "      <td>0.361</td>\n",
       "      <td>-0.063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.733</td>\n",
       "      <td>0.368</td>\n",
       "      <td>0.403</td>\n",
       "      <td>0.277</td>\n",
       "      <td>0.279</td>\n",
       "      <td>0.283</td>\n",
       "      <td>0.165</td>\n",
       "      <td>-0.112</td>\n",
       "      <td>0.272</td>\n",
       "      <td>0.011</td>\n",
       "      <td>-0.679</td>\n",
       "      <td>1.435</td>\n",
       "      <td>0.769</td>\n",
       "      <td>-0.564</td>\n",
       "      <td>-0.574</td>\n",
       "      <td>0.417</td>\n",
       "      <td>-0.063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.684</td>\n",
       "      <td>0.638</td>\n",
       "      <td>0.314</td>\n",
       "      <td>0.332</td>\n",
       "      <td>0.328</td>\n",
       "      <td>0.260</td>\n",
       "      <td>0.209</td>\n",
       "      <td>-0.028</td>\n",
       "      <td>0.106</td>\n",
       "      <td>-0.251</td>\n",
       "      <td>-0.454</td>\n",
       "      <td>0.881</td>\n",
       "      <td>-0.349</td>\n",
       "      <td>-0.394</td>\n",
       "      <td>-0.572</td>\n",
       "      <td>1.078</td>\n",
       "      <td>-0.259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2883</th>\n",
       "      <td>0.190</td>\n",
       "      <td>-0.025</td>\n",
       "      <td>-0.026</td>\n",
       "      <td>0.189</td>\n",
       "      <td>0.809</td>\n",
       "      <td>-0.138</td>\n",
       "      <td>0.161</td>\n",
       "      <td>0.686</td>\n",
       "      <td>0.598</td>\n",
       "      <td>0.355</td>\n",
       "      <td>-0.212</td>\n",
       "      <td>-1.310</td>\n",
       "      <td>0.141</td>\n",
       "      <td>1.003</td>\n",
       "      <td>0.454</td>\n",
       "      <td>-0.208</td>\n",
       "      <td>0.314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2884</th>\n",
       "      <td>0.507</td>\n",
       "      <td>0.557</td>\n",
       "      <td>0.537</td>\n",
       "      <td>0.372</td>\n",
       "      <td>0.465</td>\n",
       "      <td>0.296</td>\n",
       "      <td>0.183</td>\n",
       "      <td>-0.380</td>\n",
       "      <td>0.562</td>\n",
       "      <td>-0.061</td>\n",
       "      <td>-0.237</td>\n",
       "      <td>-1.314</td>\n",
       "      <td>-0.634</td>\n",
       "      <td>1.076</td>\n",
       "      <td>0.808</td>\n",
       "      <td>-0.287</td>\n",
       "      <td>0.314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2885</th>\n",
       "      <td>-0.394</td>\n",
       "      <td>-0.721</td>\n",
       "      <td>-0.818</td>\n",
       "      <td>0.058</td>\n",
       "      <td>0.268</td>\n",
       "      <td>-0.485</td>\n",
       "      <td>0.084</td>\n",
       "      <td>0.748</td>\n",
       "      <td>-0.188</td>\n",
       "      <td>0.240</td>\n",
       "      <td>0.034</td>\n",
       "      <td>-1.310</td>\n",
       "      <td>0.389</td>\n",
       "      <td>1.042</td>\n",
       "      <td>0.813</td>\n",
       "      <td>-0.179</td>\n",
       "      <td>0.133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2886</th>\n",
       "      <td>-0.219</td>\n",
       "      <td>-0.282</td>\n",
       "      <td>-0.596</td>\n",
       "      <td>0.133</td>\n",
       "      <td>-0.051</td>\n",
       "      <td>-0.344</td>\n",
       "      <td>-0.049</td>\n",
       "      <td>0.555</td>\n",
       "      <td>-0.497</td>\n",
       "      <td>-0.395</td>\n",
       "      <td>-0.140</td>\n",
       "      <td>-1.313</td>\n",
       "      <td>-0.310</td>\n",
       "      <td>1.129</td>\n",
       "      <td>0.803</td>\n",
       "      <td>1.061</td>\n",
       "      <td>0.019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2887</th>\n",
       "      <td>0.368</td>\n",
       "      <td>0.380</td>\n",
       "      <td>0.244</td>\n",
       "      <td>0.208</td>\n",
       "      <td>-0.042</td>\n",
       "      <td>-0.225</td>\n",
       "      <td>-0.049</td>\n",
       "      <td>0.388</td>\n",
       "      <td>-0.252</td>\n",
       "      <td>-0.419</td>\n",
       "      <td>0.092</td>\n",
       "      <td>-1.314</td>\n",
       "      <td>-0.114</td>\n",
       "      <td>0.899</td>\n",
       "      <td>0.247</td>\n",
       "      <td>0.057</td>\n",
       "      <td>-0.063</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2886 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         V0     V1     V8    V27    V31     V2     V3    V37    V20    V10  \\\n",
       "0     0.566  0.016 -0.436  0.168 -0.615 -0.143  0.407 -3.508  0.610 -0.940   \n",
       "1     0.968  0.437  0.332  0.338  0.032  0.066  0.566 -0.730  0.588  0.188   \n",
       "2     1.013  0.568  0.396  0.326  0.277  0.235  0.370 -0.589  0.576  0.874   \n",
       "3     0.733  0.368  0.403  0.277  0.279  0.283  0.165 -0.112  0.272  0.011   \n",
       "4     0.684  0.638  0.314  0.332  0.328  0.260  0.209 -0.028  0.106 -0.251   \n",
       "...     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "2883  0.190 -0.025 -0.026  0.189  0.809 -0.138  0.161  0.686  0.598  0.355   \n",
       "2884  0.507  0.557  0.537  0.372  0.465  0.296  0.183 -0.380  0.562 -0.061   \n",
       "2885 -0.394 -0.721 -0.818  0.058  0.268 -0.485  0.084  0.748 -0.188  0.240   \n",
       "2886 -0.219 -0.282 -0.596  0.133 -0.051 -0.344 -0.049  0.555 -0.497 -0.395   \n",
       "2887  0.368  0.380  0.244  0.208 -0.042 -0.225 -0.049  0.388 -0.252 -0.419   \n",
       "\n",
       "         V5    V24    V13    V19    V18    V30    V22  \n",
       "0    -0.901  0.800  0.550 -0.991 -0.573  0.109 -0.063  \n",
       "1    -0.893  0.801  1.109 -0.836 -0.571  0.124 -0.063  \n",
       "2    -0.797  0.961  0.767 -0.558 -0.564  0.361 -0.063  \n",
       "3    -0.679  1.435  0.769 -0.564 -0.574  0.417 -0.063  \n",
       "4    -0.454  0.881 -0.349 -0.394 -0.572  1.078 -0.259  \n",
       "...     ...    ...    ...    ...    ...    ...    ...  \n",
       "2883 -0.212 -1.310  0.141  1.003  0.454 -0.208  0.314  \n",
       "2884 -0.237 -1.314 -0.634  1.076  0.808 -0.287  0.314  \n",
       "2885  0.034 -1.310  0.389  1.042  0.813 -0.179  0.133  \n",
       "2886 -0.140 -1.313 -0.310  1.129  0.803  1.061  0.019  \n",
       "2887  0.092 -1.314 -0.114  0.899  0.247  0.057 -0.063  \n",
       "\n",
       "[2886 rows x 17 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features = train1.iloc[:,:-1]\n",
    "targets = train1.iloc[:,-1]\n",
    "features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       0.175\n",
       "1       0.676\n",
       "2       0.633\n",
       "3       0.206\n",
       "4       0.384\n",
       "        ...  \n",
       "2883    0.235\n",
       "2884    1.042\n",
       "2885    0.005\n",
       "2886    0.350\n",
       "2887    0.417\n",
       "Name: target, Length: 2886, dtype: float64"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "targets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "xtrain,xtest,ytrain,ytest = train_test_split(features,targets,test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((2020, 17), (2020,))"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xtrain.shape,ytrain.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1.0, 0.7361205329854774)"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#回归树\n",
    "m1 = DecisionTreeRegressor()\n",
    "m1.fit(xtrain,ytrain)\n",
    "m1.score(xtrain,ytrain,scoring='mean'),m1.score(xtest,ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.9802479874837025, 0.8799845063216539)"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#随机森林\n",
    "m2 = RandomForestRegressor()\n",
    "m2.fit(xtrain,ytrain)\n",
    "m2.score(xtrain,ytrain),m2.score(xtest,ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.9223763846895408, 0.8820408501130245)"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#GBDT\n",
    "m3 = GradientBoostingRegressor()\n",
    "m3.fit(xtrain,ytrain)\n",
    "m3.score(xtrain,ytrain),m3.score(xtest,ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.9985160273125366, 0.8559352920911497)"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#xgboost\n",
    "from xgboost import XGBRegressor,XGBRFRegressor\n",
    "m4 = XGBRegressor()\n",
    "m4.fit(xtrain,ytrain)\n",
    "m4.score(xtrain,ytrain),m4.score(xtest,ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.9129127787956588, 0.8704789076788939)"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m5 = XGBRFRegressor()\n",
    "m5.fit(xtrain,ytrain)\n",
    "m5.score(xtrain,ytrain),m5.score(xtest,ytest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "m3_min -0.28054713804923925\n",
      "m3_max -0.06999981288448114\n",
      "m3_mean -0.13963680429148168\n",
      "\n",
      "m5_min -0.289549912439088\n",
      "m5_max -0.07537847627027526\n",
      "m5_mean -0.14996637910560562\n"
     ]
    }
   ],
   "source": [
    "#对表现较好的m3,m5进行调参\n",
    "#交叉验证\n",
    "x_m3 = cross_val_score(m3,features,targets,cv=10,scoring='neg_mean_squared_error')\n",
    "x_m5 = cross_val_score(m5,features,targets,cv=10,scoring='neg_mean_squared_error')\n",
    "x_m3_min = x_m3.min()\n",
    "x_m3_max = x_m3.max()\n",
    "x_m3_mean = x_m3.sum()/10\n",
    "x_m5_min = x_m5.min()\n",
    "x_m5_max = x_m5.max()\n",
    "x_m5_mean = x_m5.sum()/10\n",
    "print('m3_min',x_m3_min)\n",
    "print('m3_max',x_m3_max)\n",
    "print('m3_mean',x_m3_mean)\n",
    "print('\\nm5_min',x_m5_min)\n",
    "print('m5_max',x_m5_max)\n",
    "print('m5_mean',x_m5_mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',\n",
       "                          init=None, learning_rate=0.1, loss='ls', max_depth=3,\n",
       "                          max_features=None, max_leaf_nodes=None,\n",
       "                          min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "                          min_samples_leaf=1, min_samples_split=2,\n",
       "                          min_weight_fraction_leaf=0.0, n_estimators=100,\n",
       "                          n_iter_no_change=None, presort='deprecated',\n",
       "                          random_state=None, subsample=1.0, tol=0.0001,\n",
       "                          validation_fraction=0.1, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "m3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',\n",
       "                           init=None, learning_rate=0.1, loss='ls', max_depth=3,\n",
       "                           max_features=None, max_leaf_nodes=None,\n",
       "                           min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "                           min_samples_leaf=1, min_samples_split=2,\n",
       "                           min_weight_fraction_leaf=0.0, n_estimators=100,\n",
       "                           n_iter_no_change=None, presort='deprecated',\n",
       "                           random_state=None, subsample=1.0, tol=0.0001,\n",
       "                           validation_fraction=0.1, verbose=0, warm_start=False),\n",
       " -0.1269536108006453)"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "paras_test1 = {\n",
    "    'max_depth': range(2,8,1)\n",
    "}\n",
    "g1 = GridSearchCV(estimator=m3,param_grid=paras_test1,scoring='neg_mean_squared_error',cv=10)\n",
    "g1.fit(xtrain,ytrain)\n",
    "g1.best_estimator_,g1.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
